library(readr)
library(dplyr)
dat <- read_csv("data/WDIData.csv")
dat
glimpse(dat)
## Observations: 409,992
## Variables: 63
## $ `Country Name` <chr> "Arab World", "Arab World", "Arab World", "Ar...
## $ `Country Code` <chr> "ARB", "ARB", "ARB", "ARB", "ARB", "ARB", "AR...
## $ `Indicator Name` <chr> "2005 PPP conversion factor, GDP (LCU per int...
## $ `Indicator Code` <chr> "PA.NUS.PPP.05", "PA.NUS.PRVT.PP.05", "EG.CFT...
## $ `1960` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `1961` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `1962` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `1963` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `1964` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `1965` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `1966` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `1967` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `1968` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `1969` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `1970` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `1971` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `1972` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `1973` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `1974` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `1975` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `1976` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `1977` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `1978` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `1979` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `1980` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `1981` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `1982` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `1983` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `1984` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `1985` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `1986` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `1987` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `1988` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `1989` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `1990` <dbl> NA, NA, NA, 76.14870, 59.35809, NA, NA, NA, N...
## $ `1991` <dbl> NA, NA, NA, 76.56355, 60.17835, NA, NA, NA, N...
## $ `1992` <dbl> NA, NA, NA, 74.06687, 61.29522, NA, NA, NA, N...
## $ `1993` <dbl> NA, NA, NA, 74.84607, 62.10662, NA, NA, NA, N...
## $ `1994` <dbl> NA, NA, NA, 75.30393, 62.96733, NA, NA, NA, N...
## $ `1995` <dbl> NA, NA, NA, 76.04233, 63.72973, NA, NA, NA, N...
## $ `1996` <dbl> NA, NA, NA, 76.76857, 64.70132, NA, NA, NA, N...
## $ `1997` <dbl> NA, NA, NA, 77.14708, 64.95271, NA, NA, NA, N...
## $ `1998` <dbl> NA, NA, NA, 77.98579, 66.38775, NA, NA, NA, N...
## $ `1999` <dbl> NA, NA, NA, 78.55301, 64.08934, NA, NA, NA, N...
## $ `2000` <dbl> NA, NA, 76.59969, 79.23617, 65.18909, NA, NA,...
## $ `2001` <dbl> NA, NA, 77.44310, 79.76829, 65.52742, NA, NA,...
## $ `2002` <dbl> NA, NA, 78.24495, 80.22959, 66.22084, NA, NA,...
## $ `2003` <dbl> NA, NA, 79.01927, 80.96835, 67.48075, NA, NA,...
## $ `2004` <dbl> NA, NA, 79.79252, 82.62879, 67.51666, 93.0769...
## $ `2005` <dbl> NA, NA, 80.57069, 83.35198, 70.09179, 93.0009...
## $ `2006` <dbl> NA, NA, 81.34937, 83.78790, 70.26531, 93.2334...
## $ `2007` <dbl> NA, NA, 82.11228, 84.39166, 71.23760, 93.4736...
## $ `2008` <dbl> NA, NA, 82.83193, 85.04225, 72.29232, 95.0940...
## $ `2009` <dbl> NA, NA, 83.47462, 84.65534, 71.76534, 94.7233...
## $ `2010` <dbl> NA, NA, 84.00608, 85.95535, 73.73427, 95.2998...
## $ `2011` <dbl> NA, NA, 84.41615, 86.39231, 74.50072, 95.3942...
## $ `2012` <dbl> NA, NA, 84.73457, 86.84697, 75.21850, 95.4750...
## $ `2013` <dbl> NA, NA, 85.00364, 87.60496, 76.71225, 95.6688...
## $ `2014` <dbl> NA, NA, 85.24497, 88.03912, 77.40727, 96.0886...
## $ `2015` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `2016` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ `2017` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ X63 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
library(tidyr)
dc <- dat %>%
filter(`Indicator Name`=="GDP growth (annual %)") %>%
filter(`Country Name`==c("United Kingdom", "United States", "European Union"))
glimpse(dc)
## Observations: 3
## Variables: 63
## $ `Country Name` <chr> "European Union", "United Kingdom", "United S...
## $ `Country Code` <chr> "EUU", "GBR", "USA"
## $ `Indicator Name` <chr> "GDP growth (annual %)", "GDP growth (annual ...
## $ `Indicator Code` <chr> "NY.GDP.MKTP.KD.ZG", "NY.GDP.MKTP.KD.ZG", "NY...
## $ `1960` <dbl> NA, NA, NA
## $ `1961` <dbl> 5.59226, 2.57357, 2.30000
## $ `1962` <dbl> 5.031483, 1.327993, 6.100000
## $ `1963` <dbl> 5.147830, 3.956241, 4.400000
## $ `1964` <dbl> 5.567709, 5.038282, 5.800000
## $ `1965` <dbl> 4.393252, 2.788977, 6.400000
## $ `1966` <dbl> 4.298803, 2.049311, 6.500000
## $ `1967` <dbl> 4.456442, 2.310965, 2.500000
## $ `1968` <dbl> 5.064403, 3.984738, 4.800000
## $ `1969` <dbl> 5.787202, 2.052446, 3.100000
## $ `1970` <dbl> 5.608831, 5.957661, 3.206807
## $ `1971` <dbl> 3.639819, 3.479335, 3.295477
## $ `1972` <dbl> 4.714796, 4.294470, 5.263263
## $ `1973` <dbl> 6.105374, 6.516075, 5.643125
## $ `1974` <dbl> 2.2240886, -2.4726740, -0.5171546
## $ `1975` <dbl> -0.7789266, -1.4883432, -0.1976785
## $ `1976` <dbl> 4.598848, 2.921780, 5.386090
## $ `1977` <dbl> 2.813866, 2.463214, 4.608597
## $ `1978` <dbl> 3.224537, 4.195285, 5.561685
## $ `1979` <dbl> 3.830662, 3.735497, 3.175691
## $ `1980` <dbl> 1.4696051, -2.0411644, -0.2445962
## $ `1981` <dbl> 0.3204955, -0.7789262, 2.5944704
## $ `1982` <dbl> 0.9862215, 2.0156720, -1.9108911
## $ `1983` <dbl> 1.834572, 4.220573, 4.632457
## $ `1984` <dbl> 2.453812, 2.274567, 7.259087
## $ `1985` <dbl> 2.614133, 4.187306, 4.238738
## $ `1986` <dbl> 2.670649, 3.153252, 3.511614
## $ `1987` <dbl> 2.944893, 5.359485, 3.461748
## $ `1988` <dbl> 4.409114, 5.787400, 4.203972
## $ `1989` <dbl> 3.725087, 2.582895, 3.680524
## $ `1990` <dbl> 2.971492, 0.716883, 1.919370
## $ `1991` <dbl> 1.42471574, -1.11898405, -0.07408453
## $ `1992` <dbl> 1.0580811, 0.3598724, 3.5553961
## $ `1993` <dbl> -0.1512518, 2.5070137, 2.7458567
## $ `1994` <dbl> 2.836740, 3.885075, 4.037643
## $ `1995` <dbl> 2.686671, 2.506373, 2.718976
## $ `1996` <dbl> 1.996811, 2.548734, 3.795881
## $ `1997` <dbl> 2.772792, 3.127178, 4.487026
## $ `1998` <dbl> 2.977477, 3.190779, 4.449911
## $ `1999` <dbl> 3.032757, 3.283348, 4.685200
## $ `2000` <dbl> 3.881161, 3.744962, 4.092176
## $ `2001` <dbl> 2.2360380, 2.7261073, 0.9759818
## $ `2002` <dbl> 1.336565, 2.397248, 1.786128
## $ `2003` <dbl> 1.334203, 3.466239, 2.806776
## $ `2004` <dbl> 2.594107, 2.527877, 3.785743
## $ `2005` <dbl> 2.084431, 2.972096, 3.345216
## $ `2006` <dbl> 3.357783, 2.503009, 2.666626
## $ `2007` <dbl> 3.086410, 2.555819, 1.778570
## $ `2008` <dbl> 0.4581657, -0.6272052, -0.2916215
## $ `2009` <dbl> -4.383413, -4.327738, -2.775530
## $ `2010` <dbl> 2.153152, 1.915162, 2.531921
## $ `2011` <dbl> 1.665908, 1.509062, 1.601455
## $ `2012` <dbl> -0.4718219, 1.3130186, 2.2240309
## $ `2013` <dbl> 0.2223498, 1.9110784, 1.6773315
## $ `2014` <dbl> 1.669613, 3.070484, 2.370458
## $ `2015` <dbl> 2.202898, 2.194229, 2.596148
## $ `2016` <dbl> 1.873942, 1.806018, 1.615656
## $ `2017` <dbl> NA, NA, NA
## $ X63 <chr> NA, NA, NA
names(dc)
## [1] "Country Name" "Country Code" "Indicator Name" "Indicator Code"
## [5] "1960" "1961" "1962" "1963"
## [9] "1964" "1965" "1966" "1967"
## [13] "1968" "1969" "1970" "1971"
## [17] "1972" "1973" "1974" "1975"
## [21] "1976" "1977" "1978" "1979"
## [25] "1980" "1981" "1982" "1983"
## [29] "1984" "1985" "1986" "1987"
## [33] "1988" "1989" "1990" "1991"
## [37] "1992" "1993" "1994" "1995"
## [41] "1996" "1997" "1998" "1999"
## [45] "2000" "2001" "2002" "2003"
## [49] "2004" "2005" "2006" "2007"
## [53] "2008" "2009" "2010" "2011"
## [57] "2012" "2013" "2014" "2015"
## [61] "2016" "2017" "X63"
dc1 <- select(dc, "Country Name", "Indicator Name", 5:61)
dc2 <- gather(dc1, key="Year", value="GDP growth (annual %)", -"Country Name", na.rm=TRUE)
glimpse(dc2)
## Observations: 171
## Variables: 3
## $ `Country Name` <chr> "European Union", "United Kingdom", "U...
## $ Year <chr> "Indicator Name", "Indicator Name", "I...
## $ `GDP growth (annual %)` <chr> "GDP growth (annual %)", "GDP growth (...
dc2
dc2 <- dc2[-c(1:3), ]
dc2
dc2$Year <- as.numeric(dc2$Year)
dc2$`GDP growth (annual %)` <- as.numeric(dc2$`GDP growth (annual %)`)
class(dc2$Year)
## [1] "numeric"
class(dc2$`GDP growth (annual %)`)
## [1] "numeric"
glimpse(dc2)
## Observations: 168
## Variables: 3
## $ `Country Name` <chr> "European Union", "United Kingdom", "U...
## $ Year <dbl> 1961, 1961, 1961, 1962, 1962, 1962, 19...
## $ `GDP growth (annual %)` <dbl> 5.592260, 2.573570, 2.300000, 5.031483...
library(rbokeh)
p <- figure(width = 600, height = 350, legend_location = "top_right", title = "GDP Growth (%) USA, EU, UK", logo = NULL) %>%
ly_lines(x="Year", y="GDP growth (annual %)", data = dc2,
alpha = 0.5, width = 5, col = "Country Name")
p
## Congratulations, your first plot! But still not publication quality… let’s customize some more… #### - We will add points too. #### - Put it black and transparent (using alpha transparency)
library(rbokeh)
q <- figure(width = 600, height = 350, legend_location = "top_right", title = "GDP Growth (%) USA, EU, UK", logo = NULL) %>%
ly_lines(x="Year", y="GDP growth (annual %)", data = dc2,
alpha = 0.5, width = 5, col = "Country Name") %>%
ly_points(x="Year", y="GDP growth (annual %)", data = dc2,
alpha = 0.5, size = 4, col = "black")
q
## Not bad! But still not publication quality… what about the legend? (it is covering our points) #### - Let’s make it transparent too (Pipe it via theme_legend). #### - Let’s change the default font too (everybody ♥ Garamond, right?).
library(rbokeh)
r <- figure(width = 600, height = 350, legend_location = "top_right", title = "GDP Growth (%) USA, EU, UK", logo = NULL) %>%
ly_lines(x="Year", y="GDP growth (annual %)", data = dc2,
alpha = 0.5, width = 5, col = "Country Name") %>%
ly_points(x="Year", y="GDP growth (annual %)", data = dc2,
alpha = 0.5, size = 4, col = "black") %>%
theme_legend(border_line_width = 1, background_fill_alpha = 0.1, label_text_font_size = "8pt", label_text_align = "left", label_text_font = "Garamond", label_text_font_style = "bold")
r
## Getting better! But still not publication quality… what about the title? #### - Let’s make it bigger & Garamond too (Pipe it via theme_title).
library(rbokeh)
s <- figure(width = 600, height = 350, legend_location = "top_right", title = "GDP Growth (%) USA, EU, UK", logo = NULL) %>%
ly_lines(x="Year", y="GDP growth (annual %)", data = dc2,
alpha = 0.5, width = 5, col = "Country Name") %>%
ly_points(x="Year", y="GDP growth (annual %)", data = dc2,
alpha = 0.5, size = 4, col = "black") %>%
theme_legend(border_line_width = 1, background_fill_alpha = 0.1, label_text_font_size = "8pt", label_text_align = "left", label_text_font = "Garamond", label_text_font_style = "bold") %>%
theme_title(text_align = "center", text_font = "Garamond", text_font_size = "14pt", text_baseline = "bottom")
s
## Definitely better! But still not publication quality… what about the axis? #### - Let’s make it bold & Garamond too (Pipe it via theme_axis). #### - Let’s change major_label font too (everybody still ♥ Garamond, right?).
library(rbokeh)
t <- figure(width = 600, height = 350, legend_location = "top_right", title = "GDP Growth (%) USA, EU, UK", logo = NULL) %>%
ly_lines(x="Year", y="GDP growth (annual %)", data = dc2,
alpha = 0.5, width = 5, col = "Country Name") %>%
ly_points(x="Year", y="GDP growth (annual %)", data = dc2,
alpha = 0.5, size = 4, col = "black") %>%
theme_legend(border_line_width = 1, background_fill_alpha = 0.1, label_text_font_size = "8pt", label_text_align = "left", label_text_font = "Garamond", label_text_font_style = "bold") %>%
theme_title(text_align = "center", text_font = "Garamond", text_font_size = "14pt", text_baseline = "bottom") %>%
theme_axis(axis_label_text_font = "Garamond",
axis_label_text_font_size = "12pt", axis_label_text_font_style = "bold", major_label_text_font = "Garamond", major_label_text_font_size = "10pt",
major_label_text_font_style = "bold")
t
## Nearly there! But still not publication quality… it is growth, right? What about to make it visually distinctive (divide positive/negative growth)? #### - rbokeh allows us to do this via ly_abline, so let’s Pipe it there ([a,b] to [0,0]).
library(rbokeh)
library(htmlwidgets)
u <- figure(width = 600, height = 350, legend_location = "top_right", title = "GDP Growth (%) USA, EU, UK", logo = NULL, tools = c("pan", "wheel_zoom", "box_zoom", "box_select", "reset", "resize")) %>%
ly_lines(x="Year", y="GDP growth (annual %)", data = dc2,
alpha = 0.5, width = 5, col = "Country Name") %>%
ly_points(x="Year", y="GDP growth (annual %)", data = dc2,
alpha = 0.5, size = 4, col = "black") %>%
theme_legend(border_line_width = 1, background_fill_alpha = 0.1, label_text_font_size = "8pt", label_text_align = "left", label_text_font = "Garamond", label_text_font_style = "bold") %>%
theme_title(text_align = "center", text_font = "Garamond", text_font_size = "14pt", text_baseline = "bottom") %>%
theme_axis(axis_label_text_font = "Garamond",
axis_label_text_font_size = "12pt", axis_label_text_font_style = "bold", major_label_text_font = "Garamond", major_label_text_font_size = "10pt",
major_label_text_font_style = "bold") %>%
ly_abline(a = 0, b = 0, v = NULL, h = NULL, coef = NULL,
color = "black", width = 1, type = 1, legend = NULL,
visible = TRUE) %>%
tool_lasso_select()
u
saveWidget(u, file="rbokeh001.html")